|
|
@@ -19,7 +19,7 @@ module Agents
|
19
|
19
|
|
20
|
20
|
`url` can be a single url, or an array of urls (for example, for multiple pages with the exact same structure but different content to scrape)
|
21
|
21
|
|
22
|
|
- The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values.
|
|
22
|
+ The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload, or if you set `url_on_receive` it is used as a Liquid template to generate the url to access. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values.
|
23
|
23
|
|
24
|
24
|
# Supported Document Types
|
25
|
25
|
|
|
|
@@ -135,7 +135,8 @@ module Agents
|
135
|
135
|
|
136
|
136
|
def validate_options
|
137
|
137
|
# Check for required fields
|
138
|
|
- errors.add(:base, "url and expected_update_period_in_days are required") unless options['expected_update_period_in_days'].present? && options['url'].present?
|
|
138
|
+ errors.add(:base, "either url or url_on_receive is required") unless options['url'].present? || options['url_on_receive'].present?
|
|
139
|
+ errors.add(:base, "expected_update_period_in_days is required") unless options['expected_update_period_in_days'].present?
|
139
|
140
|
if !options['extract'].present? && extraction_type != "json"
|
140
|
141
|
errors.add(:base, "extract is required for all types except json")
|
141
|
142
|
end
|
|
|
@@ -257,7 +258,12 @@ module Agents
|
257
|
258
|
def receive(incoming_events)
|
258
|
259
|
incoming_events.each do |event|
|
259
|
260
|
interpolate_with(event) do
|
260
|
|
- url_to_scrape = event.payload['url']
|
|
261
|
+ url_to_scrape =
|
|
262
|
+ if url_template = options['url_on_receive'].presence
|
|
263
|
+ interpolate_string(url_template)
|
|
264
|
+ else
|
|
265
|
+ event.payload['url']
|
|
266
|
+ end
|
261
|
267
|
check_url(url_to_scrape,
|
262
|
268
|
interpolated['mode'].to_s == "merge" ? event.payload : {})
|
263
|
269
|
end
|